import request

url = 'https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzU1NzEzNjc4MA==&scene=124&uin=OTkzNDc5ODAw&key=d97826e31f41ebf71fcc7f1fb01452b39c510aaf976e81cb4b7f267f5deda75afff4dab3afc5808e9c89bcd60e0fe28ee431c82a030a22bc998f1a54619f09b7784bfd9b882ea3b45f7faf623d961815a98b700ec9693078ac6c53c0d951ad4986d48d9259afa1174a7fa73de3c7a9314109ae4914b84fa8e7d6d72af245105a&devicetype=Windows+10+x64&version=63030073&lang=zh_CN&a8scene=7&pass_ticket=lq8diyWmMMmLjQD5FZfXtu0b2oUhaqd2RSs%2BSLfUeYVTXlVZWCbXo9so8jYNQKTc&fontgear=2'

headers = '''
Host: mp.weixin.qq.com
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63030073)
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Cookie: rewardsn=; wxtokenkey=777; wxuin=993479800; lang=zh_CN; pgv_info=ssid=s5645063921; pgv_pvid=1676347020; dnfqqcomrouteLine=a20210705care; eas_sid=j1h6e226c537J143S7N3002748; _qpsvr_localtk=0.04996191186518084; appmsg_token=1122_wzuo9h81sn7iE%2FMvtA7_dLW43EUmaoBsr2nARJpwJuT21YX7s-Y0BnRQUwj3TAGDnrgQNvG1QIuYehTB; devicetype=Windows10x64; version=63030073; pass_ticket=lq8diyWmMMmLjQD5FZfXtu0b2oUhaqd2RSs+SLfUeYVTXlVZWCbXo9so8jYNQKTc; wap_sid2=CPiY3dkDErYBeV9IR0pTcWhKdWZIT09qUEFOSUdEanRvZnB3eGN4REhCcTJiVC01YU14SE9ZQ0hqZU16dHVYTWQtWVpGdllieE0tXzlZT3k3eWdzNFF6bllSVjV3dmlrVjhyQ2c5S1F6TVc2VlQzNlM2eGxUelQtSVEzR1pDbjRZVG1mcmV3enlIYWEySUEzYXdQdTd4Zk9rb2pVYmlTNzBVMHo0TE0zalN1ZU83ck8yT0VUY2E0UUJJQUFBfn4wr/HPhwY4DUCVTg==
Sec-Fetch-Site: none
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7
'''


def extract_data(html_content):
    """
    从html页面中提取历史文章数据
    :param html_content 页面源代码
    :return: 历史文章列表
    """
    import re
    import html
    import json

    rex = "msgList = '({.*?})'"  # 正则表达
    pattern = re.compile(pattern=rex, flags=re.S)
    match = pattern.search(html_content)
    if match:
        data = match.group(1)
        data = html.unescape(data)  # 处理转义
        # print('data: {}'.format(data))
        data = json.loads(data)
        articles = data.get("list")

        return articles


def home_data():
    html = request.request_data(url, headers)
    if html == "-1":
        return
    data = extract_data(html)
    list = []
    for item in data:
        item_url = item['app_msg_ext_info']['content_url']
        print(item_url)
        list.append(item_url)
    return list
